# Importing Libraries
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ ggplot2 3.4.3 ✔ fma 2.5
## ✔ forecast 8.21.1 ✔ expsmooth 2.3
##
library(readr)
GOLD <- read_csv("GLD_Final.csv")
## New names:
## • `` -> `...3`
## • `` -> `...4`
## Rows: 60 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (1): Close
## lgl (2): ...3, ...4
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Gold_Raw <- GOLD$Close
Gold_ts <- ts(Gold_Raw, frequency = 12, start = c(2018,12))
plot(Gold_ts)
#### 1. Show a time series plot
gold_time_series <- window(Gold_ts, start = c(2018,12))
plot(gold_time_series)
### 2. Please summaries your observations of the times series plot
# We see that for the time series plot, the gold prices rise tremendously from 2019 to mid 2020 followed by a series of rise and falls after and then eventually rising to the highest.
summary(gold_time_series)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 121.2 149.1 166.7 161.9 177.4 188.8
# The min value is : 121.2
# The max value is : 188.8
# The Mean value is : 161.9
# The median value is : 166.7
# The 1st and 3rd quartile values are : 149.1 and 177.4 respectively.
boxplot(gold_time_series, main = "Box Plot of Gold prices Data (from 2018)")
hist(gold_time_series, main = "Histogram Plot of Gold prices Data (from 2018)")
Acf(gold_time_series, main = "Acf of Gold prices Data (from 2018)")
stl_decomp <- stl(gold_time_series,s.window ="periodic")
plot(stl_decomp, main = 'Decomposition plot')
decom <- decompose(gold_time_series)
decom$type
## [1] "additive"
decom$figure
## [1] 0.04823710 0.96188338 -2.70030316 -0.03499122 2.76417765 3.94511537
## [7] 0.14365490 3.54521961 3.74386415 -3.03572394 -3.75832841 -5.62280544
In June ,it is a holiday season for many students and since everyone has some time off, they tend to buy certain items for their vacation. So it can be a good time to buy some gold so that they could wear it for their vacation. It can also be a good wedding season time due to the weather not being too cold.
plot(gold_time_series)
lines(seasadj(stl_decomp), main = "Seasonally adjusted plot", col="Green")
gold_naive = naive(gold_time_series)
plot(gold_naive, main = "Naive Forecast")
plot(gold_naive$residuals, main = "Naive forecast residuals plot for gold data")
hist(gold_naive$residuals, main ='Histogram of Residuals')
cbind(Fitted = fitted(gold_naive),
Residuals=residuals(gold_naive)) %>%
as.data.frame() %>%
ggplot(aes(x=Fitted, y=Residuals)) + geom_point()
## Warning: Removed 1 rows containing missing values (`geom_point()`).
#Inferences: The graph shows Heteroscedasticity which means that the variance of the residuals is not constant across the range of fitted values. The values are also random.
cbind(Actual = gold_time_series,
Residuals=residuals(gold_naive)) %>%
as.data.frame() %>%
ggplot(aes(x=Actual, y=Residuals)) + geom_point()
## Warning: Removed 1 rows containing missing values (`geom_point()`).
#Inferences : The Actual vs residuals plot is very similar to the fitted vs residuals. The graph shows Heteroscedasticity which means that the variance of the residuals is not constant across the range of fitted values. The values are also random.
Acf(gold_naive$residuals, main = "ACF of Naive residuals")
# Inferences : Values of the Acf have not crossed the confidence level meaning there is no trend in the residuals.
naive_accuracy <- accuracy(gold_naive)
naive_accuracy
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.144068 6.916201 5.537627 0.6631645 3.352577 0.3161084 -0.1301814
forecast(gold_time_series)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Dec 2023 188.2288 177.8746 198.5831 172.3934 204.0643
## Jan 2024 188.2288 174.2293 202.2284 166.8183 209.6393
## Feb 2024 188.2288 171.3498 205.1078 162.4147 214.0430
## Mar 2024 188.2288 168.8911 207.5665 158.6543 217.8033
## Apr 2024 188.2288 166.7084 209.7493 155.3161 221.1415
## May 2024 188.2288 164.7246 211.7331 152.2822 224.1755
## Jun 2024 188.2288 162.8930 213.5647 149.4810 226.9766
## Jul 2024 188.2288 161.1826 215.2750 146.8653 229.5924
## Aug 2024 188.2288 159.5718 216.8858 144.4017 232.0559
## Sep 2024 188.2288 158.0446 218.4130 142.0660 234.3916
## Oct 2024 188.2288 156.5888 219.8688 139.8396 236.6180
## Nov 2024 188.2288 155.1951 221.2625 137.7081 238.7495
## Dec 2024 188.2288 153.8558 222.6018 135.6599 240.7978
## Jan 2025 188.2288 152.5649 223.8927 133.6856 242.7721
## Feb 2025 188.2288 151.3172 225.1404 131.7774 244.6802
## Mar 2025 188.2288 150.1086 226.3491 129.9289 246.5287
## Apr 2025 188.2288 148.9353 227.5223 128.1346 248.3231
## May 2025 188.2288 147.7943 228.6633 126.3896 250.0680
## Jun 2025 188.2288 146.6831 229.7746 124.6901 251.7676
## Jul 2025 188.2288 145.5991 230.8585 123.0323 253.4253
## Aug 2025 188.2288 144.5405 231.9172 121.4132 255.0444
## Sep 2025 188.2288 143.5053 232.9523 119.8301 256.6275
## Oct 2025 188.2288 142.4920 233.9656 118.2804 258.1772
## Nov 2025 188.2288 141.4993 234.9584 116.7621 259.6955
plot(forecast(gold_time_series))
mavg_forecast3 = ma(gold_time_series,order=3)
mavg_forecast6 = ma(gold_time_series,order=6)
mavg_forecast9 = ma(gold_time_series,order=9)
plot(gold_time_series, main = "Plot along with moving averages")
lines(mavg_forecast3, col="Red")
lines(mavg_forecast6, col="Blue")
lines(mavg_forecast9, col="Green")
#Inferences : MAV of 3 is a better overlapping forecast as ciompared to other ones.
MA3_forecast <- forecast(mavg_forecast3, h = 12)
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
plot(MA3_forecast)
ses_data <- ses(gold_time_series)
plot(ses_data)
#Ans : 121.59
#Ans : 6.9394. Sigma defines the variance in the forecast predicted.
summary(ses_data)
##
## Forecast method: Simple exponential smoothing
##
## Model Information:
## Simple exponential smoothing
##
## Call:
## ses(y = gold_time_series)
##
## Smoothing parameters:
## alpha = 0.8983
##
## Initial states:
## l = 121.59
##
## sigma: 6.9394
##
## AIC AICc BIC
## 482.0925 482.5210 488.3755
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 1.234966 6.822766 5.484528 0.7166804 3.319371 0.3130774
## ACF1
## Training set -0.03335681
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Dec 2023 188.1548 179.2616 197.0480 174.5538 201.7558
## Jan 2024 188.1548 176.2001 200.1095 169.8717 206.4379
## Feb 2024 188.1548 173.7764 202.5333 166.1649 210.1448
## Mar 2024 188.1548 171.7059 204.6037 162.9984 213.3112
## Apr 2024 188.1548 169.8685 206.4412 160.1883 216.1214
## May 2024 188.1548 168.1995 208.1102 157.6357 218.6739
## Jun 2024 188.1548 166.6597 209.6500 155.2808 221.0288
## Jul 2024 188.1548 165.2230 211.0866 153.0837 223.2259
## Aug 2024 188.1548 163.8712 212.4384 151.0163 225.2933
## Sep 2024 188.1548 162.5908 213.7188 149.0581 227.2516
plot(ses_data$residuals, main = "Simple Smoothing forecast residuals plot")
hist(ses_data$residuals, main ='Histogram of Residuals')
cbind(Fitted = fitted(ses_data),
Residuals=residuals(ses_data)) %>%
as.data.frame() %>%
ggplot(aes(x=Fitted, y=Residuals)) + geom_point()
#Inferences : The Actual vs residuals plot is very similar to the fitted vs residuals. The graph shows Heteroscedasticity which means that the variance of the residuals is not constant across the range of fitted values. The values are also random.
cbind(Actual = gold_time_series,
Residuals=residuals(ses_data)) %>%
as.data.frame() %>%
ggplot(aes(x=Actual, y=Residuals)) + geom_point()
#Inferences : The Actual vs residuals plot is very similar to the fitted vs residuals. The graph shows Heteroscedasticity which means that the variance of the residuals is not constant across the range of fitted values. The values are also random.
Acf(ses_data$residuals, main = "ACF of Naive residuals")
#Inferences : The values have not crossed the confidence intervals & there is no seasonality.
ses_accuracy <- accuracy(ses_data)
ses_accuracy
## ME RMSE MAE MPE MAPE MASE
## Training set 1.234966 6.822766 5.484528 0.7166804 3.319371 0.3130774
## ACF1
## Training set -0.03335681
forecast(ses_data)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Dec 2023 188.1548 179.2616 197.0480 174.5538 201.7558
## Jan 2024 188.1548 176.2001 200.1095 169.8717 206.4379
## Feb 2024 188.1548 173.7764 202.5333 166.1649 210.1448
## Mar 2024 188.1548 171.7059 204.6037 162.9984 213.3112
## Apr 2024 188.1548 169.8685 206.4412 160.1883 216.1214
## May 2024 188.1548 168.1995 208.1102 157.6357 218.6739
## Jun 2024 188.1548 166.6597 209.6500 155.2808 221.0288
## Jul 2024 188.1548 165.2230 211.0866 153.0837 223.2259
## Aug 2024 188.1548 163.8712 212.4384 151.0163 225.2933
## Sep 2024 188.1548 162.5908 213.7188 149.0581 227.2516
plot(forecast(ses_data))
#1. How good is the accuracy? : The ME, RMSE values are very high which tells us that this method may not be the the optimal one.
#2. What does it predict the value of time series will be in one year? : 188.1548 (Sept 2024)
#3. Other observation : Simple smoothing forecast may not be a right way to forecast. We can consider more forecasting techniques and check if the error values are less than this one.
HW_forecast <- hw(gold_time_series, seasonal = "additive")
plot(forecast(HW_forecast))
attributes(HW_forecast)
## $names
## [1] "model" "mean" "level" "x" "upper" "lower"
## [7] "fitted" "method" "series" "residuals"
##
## $class
## [1] "forecast"
hw_add <- forecast(HW_forecast)
hw_add$model
## Holt-Winters' additive method
##
## Call:
## hw(y = gold_time_series, seasonal = "additive")
##
## Smoothing parameters:
## alpha = 0.9262
## beta = 2e-04
## gamma = 1e-04
##
## Initial states:
## l = 121.6981
## b = 1.5747
## s = -5.2835 -4.7104 -3.8007 4.2063 4.4507 0.7968
## 4.4918 3.4419 -0.5184 -3.1621 1.166 -1.0784
##
## sigma: 7.1693
##
## AIC AICc BIC
## 497.4285 511.9999 533.0323
#Inferences : (Pending)
#1 What is the value of alpha? What does that value signify?
#Ans : 0.9262 . Alpha specifies the coefficient for the level smoothing in Holtwinters.
#2. What is the value of beta? What does that value signify?
#Ans : 2e-04 . Beta specifies the coefficient for the trend smoothing in Holtwinters.
#3.What is the value of gamma? What does that value signify?
#Ans : 1e-04 Gamma specifies the coefficient for the trend smoothing in Holtwinters.
#4. What is the value of initial states for the level, trend and seasonality? What do these values signify?
#Ans : l = 121.6981 , b = 1.5747 , s = -5.2835 -4.7104 -3.8007 4.2063 4.4507 0.7968 4.4918 3.4419 -0.5184 -3.1621 1.166 -1.0784
#5.What is the value of sigma? What does the sigma signify?
#Ans : 7.1693 . Sigma defines the variance of the forecast values.
plot(hw_add$residuals, main = "Simple Smoothing forecast residuals plot")
#Inferences : The residuals do have a mean at zero for a lot of values as compared to previous ones. This looks like the best case.
hist(hw_add$residuals, main ='Histogram of Residuals')
#Inferences : The data looks to be normally distributed.
cbind(Fitted = fitted(hw_add),
Residuals=residuals(hw_add)) %>%
as.data.frame() %>%
ggplot(aes(x=Fitted, y=Residuals)) + geom_point()
#Infeences : The Fitted vs Residuals plot is random.
cbind(Actual = gold_time_series,
Residuals=residuals(hw_add)) %>%
as.data.frame() %>%
ggplot(aes(x=Actual, y=Residuals)) + geom_point()
#Inferences : The Actual vs Residuals plot is random.
Acf(hw_add$residuals, main = "ACF of Naive residuals")
#Inferences : From Acf plot, we can see that none of the values crossed
the confidence levels.This shows us that the forecast is a good as
compared to others.
hw_accuracy <- accuracy(hw_add)
hw_accuracy
## ME RMSE MAE MPE MAPE MASE
## Training set -0.403236 6.139429 4.862737 -0.2999758 2.996916 0.2775832
## ACF1
## Training set -0.01395089
#1.Time series value for next year. Show table and plot
forecast(hw_add)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Dec 2023 194.1929 185.0050 203.3807 180.1413 208.2444
## Jan 2024 198.0070 185.4826 210.5315 178.8525 217.1616
## Feb 2024 195.2511 180.1074 210.3948 172.0908 218.4114
## Mar 2024 199.4645 182.0914 216.8376 172.8946 226.0344
## Apr 2024 204.9952 185.6473 224.3432 175.4051 234.5854
## May 2024 207.6176 186.4780 228.7573 175.2873 239.9479
## Jun 2024 205.4947 182.7033 228.2861 170.6383 240.3512
## Jul 2024 210.7186 186.3868 235.0504 173.5063 247.9309
## Aug 2024 212.0452 186.2644 237.8260 172.6169 251.4735
## Sep 2024 205.6099 178.4569 232.7629 164.0830 247.1368
## Oct 2024 206.2726 177.8131 234.7321 162.7476 249.7977
## Nov 2024 207.2689 177.5599 236.9780 161.8329 252.7050
## Dec 2024 213.0457 182.1369 243.9545 165.7748 260.3166
## Jan 2025 216.8599 184.7961 248.9236 167.8226 265.8971
## Feb 2025 214.1039 180.9250 247.2828 163.3612 264.8466
## Mar 2025 218.3173 184.0592 252.5754 165.9241 270.7105
## Apr 2025 223.8481 188.5434 259.1527 169.8543 277.8419
## May 2025 226.4704 190.1491 262.7918 170.9217 282.0192
## Jun 2025 224.3475 187.0368 261.6583 167.2857 281.4094
## Jul 2025 229.5715 191.2966 267.8463 171.0351 288.1078
## Aug 2025 230.8980 191.6824 270.1136 170.9230 290.8731
## Sep 2025 224.4627 184.3282 264.5973 163.0822 285.8433
## Oct 2025 225.1255 184.0922 266.1587 162.3705 287.8804
## Nov 2025 226.1218 184.2088 268.0347 162.0214 290.2221
plot(forecast(hw_add))
#Arima
#1. Is Time Series data Stationary? How did you verify? #2. How many differences are needed to make it stationary?
ndiffs(gold_time_series)
## [1] 1
#Inferences : this has a ndiff of 1 which means that the time series data is not stationary and will need one more difference to become stationary. It also means that the series shows a trend.
#3. Is Seasonality component needed?
#Ans : yes, since it is non stationary, seasonality is needed.
#4. Plot the Time Series chart of the differenced series. #5. Plot the ACF and PACF plot of the differenced series.
tsdisplay(gold_time_series)
#6. Based on the ACF and PACF, which are the possible ARIMA model possible?
#7. Show the AIC, BIC and Sigma^2 for the possible models?
#8. Based on the above AIC, BIC and Sigma^2 values, which model will you select?
#9. What is the final formula for ARIMA with the coefficients?
auto_fit <- auto.arima(gold_time_series, trace = TRUE, stepwise = FALSE)
##
## ARIMA(0,1,0) : 397.7012
## ARIMA(0,1,0) with drift : 398.2084
## ARIMA(0,1,0)(0,0,1)[12] : 399.2171
## ARIMA(0,1,0)(0,0,1)[12] with drift : 399.9654
## ARIMA(0,1,0)(1,0,0)[12] : 399.2799
## ARIMA(0,1,0)(1,0,0)[12] with drift : 400.0174
## ARIMA(0,1,0)(1,0,1)[12] : 401.2839
## ARIMA(0,1,0)(1,0,1)[12] with drift : 402.113
## ARIMA(0,1,1) : 399.2421
## ARIMA(0,1,1) with drift : 399.3949
## ARIMA(0,1,1)(0,0,1)[12] : 400.5367
## ARIMA(0,1,1)(0,0,1)[12] with drift : 400.9205
## ARIMA(0,1,1)(1,0,0)[12] : 400.6476
## ARIMA(0,1,1)(1,0,0)[12] with drift : 401.032
## ARIMA(0,1,1)(1,0,1)[12] : 402.6594
## ARIMA(0,1,1)(1,0,1)[12] with drift : 403.1111
## ARIMA(0,1,2) : 401.4528
## ARIMA(0,1,2) with drift : 401.6859
## ARIMA(0,1,2)(0,0,1)[12] : 402.8318
## ARIMA(0,1,2)(0,0,1)[12] with drift : 403.3007
## ARIMA(0,1,2)(1,0,0)[12] : 402.9365
## ARIMA(0,1,2)(1,0,0)[12] with drift : 403.4168
## ARIMA(0,1,2)(1,0,1)[12] : 405.0491
## ARIMA(0,1,2)(1,0,1)[12] with drift : 405.5629
## ARIMA(0,1,3) : 403.7541
## ARIMA(0,1,3) with drift : 403.8629
## ARIMA(0,1,3)(0,0,1)[12] : 405.2226
## ARIMA(0,1,3)(0,0,1)[12] with drift : 405.6441
## ARIMA(0,1,3)(1,0,0)[12] : 405.3276
## ARIMA(0,1,3)(1,0,0)[12] with drift : 405.7608
## ARIMA(0,1,3)(1,0,1)[12] : 407.5144
## ARIMA(0,1,3)(1,0,1)[12] with drift : 407.8528
## ARIMA(0,1,4) : 405.7631
## ARIMA(0,1,4) with drift : 406.3386
## ARIMA(0,1,4)(0,0,1)[12] : Inf
## ARIMA(0,1,4)(0,0,1)[12] with drift : 408.1668
## ARIMA(0,1,4)(1,0,0)[12] : Inf
## ARIMA(0,1,4)(1,0,0)[12] with drift : 408.2936
## ARIMA(0,1,5) : 407.9685
## ARIMA(0,1,5) with drift : 408.1579
## ARIMA(1,1,0) : 399.23
## ARIMA(1,1,0) with drift : 399.4321
## ARIMA(1,1,0)(0,0,1)[12] : 400.5258
## ARIMA(1,1,0)(0,0,1)[12] with drift : 400.9757
## ARIMA(1,1,0)(1,0,0)[12] : 400.6301
## ARIMA(1,1,0)(1,0,0)[12] with drift : 401.0748
## ARIMA(1,1,0)(1,0,1)[12] : 402.6608
## ARIMA(1,1,0)(1,0,1)[12] with drift : 403.189
## ARIMA(1,1,1) : 401.4483
## ARIMA(1,1,1) with drift : Inf
## ARIMA(1,1,1)(0,0,1)[12] : 402.8302
## ARIMA(1,1,1)(0,0,1)[12] with drift : 403.2918
## ARIMA(1,1,1)(1,0,0)[12] : 402.9335
## ARIMA(1,1,1)(1,0,0)[12] with drift : 403.4114
## ARIMA(1,1,1)(1,0,1)[12] : 405.0487
## ARIMA(1,1,1)(1,0,1)[12] with drift : 405.5997
## ARIMA(1,1,2) : 403.7434
## ARIMA(1,1,2) with drift : Inf
## ARIMA(1,1,2)(0,0,1)[12] : 405.2092
## ARIMA(1,1,2)(0,0,1)[12] with drift : Inf
## ARIMA(1,1,2)(1,0,0)[12] : Inf
## ARIMA(1,1,2)(1,0,0)[12] with drift : Inf
## ARIMA(1,1,2)(1,0,1)[12] : 407.5174
## ARIMA(1,1,2)(1,0,1)[12] with drift : Inf
## ARIMA(1,1,3) : 406.1018
## ARIMA(1,1,3) with drift : 406.3112
## ARIMA(1,1,3)(0,0,1)[12] : 407.6612
## ARIMA(1,1,3)(0,0,1)[12] with drift : 408.1681
## ARIMA(1,1,3)(1,0,0)[12] : 407.7696
## ARIMA(1,1,3)(1,0,0)[12] with drift : 408.2879
## ARIMA(1,1,4) : 408.1914
## ARIMA(1,1,4) with drift : 408.8836
## ARIMA(2,1,0) : 401.4498
## ARIMA(2,1,0) with drift : 401.7104
## ARIMA(2,1,0)(0,0,1)[12] : 402.8302
## ARIMA(2,1,0)(0,0,1)[12] with drift : 403.3293
## ARIMA(2,1,0)(1,0,0)[12] : 402.9336
## ARIMA(2,1,0)(1,0,0)[12] with drift : 403.4391
## ARIMA(2,1,0)(1,0,1)[12] : 405.0501
## ARIMA(2,1,0)(1,0,1)[12] with drift : 405.6154
## ARIMA(2,1,1) : 403.755
## ARIMA(2,1,1) with drift : Inf
## ARIMA(2,1,1)(0,0,1)[12] : Inf
## ARIMA(2,1,1)(0,0,1)[12] with drift : Inf
## ARIMA(2,1,1)(1,0,0)[12] : Inf
## ARIMA(2,1,1)(1,0,0)[12] with drift : Inf
## ARIMA(2,1,1)(1,0,1)[12] : Inf
## ARIMA(2,1,1)(1,0,1)[12] with drift : Inf
## ARIMA(2,1,2) : Inf
## ARIMA(2,1,2) with drift : Inf
## ARIMA(2,1,2)(0,0,1)[12] : 407.6881
## ARIMA(2,1,2)(0,0,1)[12] with drift : Inf
## ARIMA(2,1,2)(1,0,0)[12] : 407.7929
## ARIMA(2,1,2)(1,0,0)[12] with drift : Inf
## ARIMA(2,1,3) : Inf
## ARIMA(2,1,3) with drift : Inf
## ARIMA(3,1,0) : 403.7391
## ARIMA(3,1,0) with drift : 403.9652
## ARIMA(3,1,0)(0,0,1)[12] : 405.1997
## ARIMA(3,1,0)(0,0,1)[12] with drift : 405.67
## ARIMA(3,1,0)(1,0,0)[12] : 405.3083
## ARIMA(3,1,0)(1,0,0)[12] with drift : 405.7931
## ARIMA(3,1,0)(1,0,1)[12] : 407.4709
## ARIMA(3,1,0)(1,0,1)[12] with drift : 407.9519
## ARIMA(3,1,1) : 406.1103
## ARIMA(3,1,1) with drift : Inf
## ARIMA(3,1,1)(0,0,1)[12] : 407.6552
## ARIMA(3,1,1)(0,0,1)[12] with drift : Inf
## ARIMA(3,1,1)(1,0,0)[12] : 407.7983
## ARIMA(3,1,1)(1,0,0)[12] with drift : Inf
## ARIMA(3,1,2) : Inf
## ARIMA(3,1,2) with drift : Inf
## ARIMA(4,1,0) : 405.9519
## ARIMA(4,1,0) with drift : 406.4225
## ARIMA(4,1,0)(0,0,1)[12] : 407.4251
## ARIMA(4,1,0)(0,0,1)[12] with drift : 408.1866
## ARIMA(4,1,0)(1,0,0)[12] : 407.5509
## ARIMA(4,1,0)(1,0,0)[12] with drift : 408.317
## ARIMA(4,1,1) : 408.4337
## ARIMA(4,1,1) with drift : 409.0027
## ARIMA(5,1,0) : 408.4215
## ARIMA(5,1,0) with drift : 408.9897
##
##
##
## Best model: ARIMA(0,1,0)
attributes(auto_fit)
## $names
## [1] "coef" "sigma2" "var.coef" "mask" "loglik" "aic"
## [7] "arma" "residuals" "call" "series" "code" "n.cond"
## [13] "nobs" "model" "bic" "aicc" "x" "fitted"
##
## $class
## [1] "forecast_ARIMA" "ARIMA" "Arima"
plot(forecast(auto_fit, h = 1, level = c(99.5)))
#10. Perform Residual Analysis for this technique.
plot(auto_fit$residuals , main = “ARIMA residuals Plot”)
#11 Do a plot of residuals. What does the plot indicate?
plot(auto_fit$residuals, main = "ARIMA residuals plot")
#Inferences : The plot shows there are values around the mean of
zero.
#12 Do a Histogram plot of residuals. What does the plot indicate?
hist(auto_fit$residuals, main = “Histogram of residuals”)
#Inferences : The hist plot shows us that there is not a normally distributed curve.
#13 Do a plot of fitted values vs. residuals. What does the plot indicate?
cbind(Fitted = fitted(ses_data),
Residuals=residuals(ses_data)) %>%
as.data.frame() %>%
ggplot(aes(x=Fitted, y=Residuals)) + geom_point()
#Inference : The values are random here. The residuals may not be constant.
#14 Do a plot of actual values vs. residuals. What does the plot indicate?
cbind(Actual = gold_time_series,
Residuals=residuals(ses_data)) %>%
as.data.frame() %>%
ggplot(aes(x=Actual, y=Residuals)) + geom_point()
#Inference : The values are random here. The residuals may not be constant.
#15 Do an ACF plot of the residuals? What does this plot indicate?
Acf(auto.fit$residuals)
#Inferences : There are no lag values crossing the confidence interval.
#16. Print the 5 measures of accuracy for this forecasting technique.
arima_accuracy <- accuracy(auto_fit)
arima_accuracy
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.127021 6.858342 5.447354 0.6537784 3.298368 0.3109553 -0.1310321
#Inferences : The ME and RMSE are not better than the Holt winters one. So this is again 2nd best to HW.
##Forecast
forecast(auto_fit, h = 12) plot(forecast(auto_fit, h =12))
forecast(auto_fit, h = 24) plot(forecast(auto_fit, h =24))
#Summarize this forecasting technique
#How good is the accuracy?
#Inferences : the accuracy is 2nd best to Holt winters.
#What does it predict time series will be in one year and next two years?
#Other observation
accuracy_table <- data.frame(
Method = c("Naive", "Simple Smoothing", "Holt-Winters"),
ME = c(naive_accuracy[1, "ME"], ses_accuracy[1, "ME"], hw_accuracy[1, "ME"]),
MAE = c(naive_accuracy[1, "MAE"], ses_accuracy[1, "MAE"], hw_accuracy[1, "MAE"]),
MASE = c(naive_accuracy[1, "MASE"], ses_accuracy[1, "MASE"], hw_accuracy[1, "MASE"]),
RMSE = c(naive_accuracy[1, "RMSE"], ses_accuracy[1, "RMSE"], hw_accuracy[1, "RMSE"]),
MAPE = c(naive_accuracy[1, "MAPE"], ses_accuracy[1, "MAPE"], hw_accuracy[1, "MAPE"])
)
print(accuracy_table)
## Method ME MAE MASE RMSE MAPE
## 1 Naive 1.144068 5.537627 0.3161084 6.916201 3.352577
## 2 Simple Smoothing 1.234966 5.484528 0.3130774 6.822766 3.319371
## 3 Holt-Winters -0.403236 4.862737 0.2775832 6.139429 2.996916
##Conclusion
#1. Summarize your analysis of time series value over the time-period.
#The data does have trend and seasonality and we found the same with Acf plots. From the forecasting methods naive, simple smoothing, and HoltWinters, we can see that HoltWinters forecast gives us the best forecasting method in this case. #Forecast is good because the error values are quiet low for HoltWinters. #Residuals in HoltWinters appear to be random and the ACF values of residuals are inside the confidence intervals. Naive is the worst since it does not have better forecasts as compared to the rest.
#2. Based on your analysis and forecast above, do you think the value of the time series will increase, decrease or stay flat over the next year? How about next 2 years?
#the time series will increase over the next & next 2 years.